/* #############################################################################

TITLE: 			IRP - MIGRATION PROJECT 
AUTHOR: 		MICHAEL BROTTRAGER
LAST CHECKED: 	18.01.2021
DECADE DATA: Özden et al. 2011 as in PERI/CATTANEO
GRIDDED DATA: Gridded Population of the World (GPW) 
############################################################################## */

*install necessary packages
/*
cap ssc install spwmatrix
cap ssc install xtbalance
cap ssc install outreg2
cap ssc install moremata
cap ssc install spmap
cap net install dm88_1.pkg
cap ssc install xsmle
cap ssc install reg2hdfe
*/

*global terminal "put your diretory here"
global terminal "/Volumes/Seagate Portabl/WORK/SUSSEX/Replication"
global data "${terminal}/data"
global temp "${terminal}/temp"
global output "${terminal}/output"
global dofiles "${terminal}/dofiles"
global logs "${terminal}/logs"
cap mkdir "${output}"
cap mkdir "${logs}"
cap mkdir "${temp}"

adopath + "${terminal}/code/ado/"

*===============================================================================
* INTERNATIONAL MIGRATION ESTIMATIONS
*===============================================================================

use "${data}/ozden.dta", clear
keep if !missing(wb_et)
drop guy*

destring ashare, replace

gen lnflow = log(wb_et/pop)
encode iso3c, g(ID)	
* get the 1990 value quartiles *
foreach var of varlist Crop rgdpe Coal Ores Petrol Gas ashare { 
	preserve
		bys ID (year): keep if _n == 1 
		xtile init = log(`var'+1), nq(4)
		keep ID  init
		*tab init, gen(xtile_`var')
		rename init q_`var'
		keep ID q_`var'
		tempfile init
		save `init',replace
	restore

	joinby ID using `init', unmatched(master)
	drop _merge
	}


gen t_i = cspei-cspei_10
keep if !missing(rgdpe)
bysort ID: keep if _N == 3
keep if continent == "Africa"
xtset ID year

tempfile A
save `A'
clear

clear
tempfile B
save `B', emptyok
foreach NUM of numlist 2(1)10 {
	use `A', clear
		*local NUM = 6
		qui: encode region23, g(R)
		qui: gen t = cspei_`NUM'-cspei_20
		qui: tab q_ashare, g(x_)
		qui: for var x_*: gen t_X =t * X
		gen neg = cspei_`NUM'<0
		xtreg lnflow  t_x_*  c.cspei_20 i.R#i.year , fe cluster(ID)
		preserve
			predict fitted
			predict resids, residuals
			keep iso3c year lnflow fitted resids
			save "${output}/ESTINTAG_fitresid.dta", replace
			export delimited using "${output}/ESTINTAG_fitresid.csv", replace

		restore
		foreach p in 1 2 3 4 {
			gen beta_`p' = _b[t_x_`p']
			gen sd_`p' = _se[t_x_`p']
			gen lb_`p' = _b[t_x_`p'] - invttail(e(df_r),0.025)*_se[t_x_`p']
			gen ub_`p' = _b[t_x_`p'] + invttail(e(df_r),0.025)*_se[t_x_`p']
		}
		di _b[t_x_4]
		qui: keep beta* sd* lb* ub*
		qui: keep if _n == 1
		qui: gen mod = `NUM'
		qui: order mod beta* sd*
		qui: append using `B'
		qui: sort mod
		qui: save `B', replace
}

use `B', replace
save  "${output}/ESTINTAG.dta", replace
export delimited using "${output}/ESTINTAG_fitresid.csv", replace


clear
tempfile B
save `B', emptyok
foreach NUM of numlist 2(1)10 {
	use `A', clear
		qui: encode region23, g(R)
		qui: gen t = cspei_`NUM'-cspei_20
		qui: tab q_rgdpe, g(x_)
		qui: for var x_*: gen t_X =t * X
		gen neg = cspei_`NUM'<0
		xtreg lnflow  t_x_*  c.cspei_20 i.R#i.year , fe cluster(ID)
		preserve
			predict fitted
			predict resids, residuals
			keep iso3c year lnflow fitted resids
			save "${output}/ESTINTGDP_fitresid.dta", replace
			export delimited using "${output}/ESTINTGDP_fitresid.csv", replace
			
		restore
		foreach p in 1 2 3 4 {
			gen beta_`p' = _b[t_x_`p']
			gen sd_`p' = _se[t_x_`p']
			gen lb_`p' = _b[t_x_`p'] - invttail(e(df_r),0.025)*_se[t_x_`p']
			gen ub_`p' = _b[t_x_`p'] + invttail(e(df_r),0.025)*_se[t_x_`p']
		}
		
		
		
		di _b[t_x_4]
		qui: keep beta* sd* lb* ub*
		qui: keep if _n == 1
		qui: gen mod = `NUM'
		qui: order mod beta* sd*
		qui: append using `B'
		qui: sort mod
		qui: save `B', replace
}

use `B', replace
save  "${output}/ESTINTGDP.dta", replace
export delimited using "${output}/ESTINTGDP.csv", replace

twoway scatter beta_1 ub_1 lb_1 mod
twoway scatter beta_2 ub_2 lb_2 mod
twoway scatter beta_3 ub_3 lb_3 mod
twoway scatter beta_4 ub_4 lb_4 mod


*===============================================================================
* GRID-LEVEL ESTIMATIONS
*===============================================================================

/* Notes: 
- Estimation requires my_ols_spatial_HAC, kindly provided by Nicolas Berman, Mathieu Couttenier, Dominic Rohner, and Mathias Thoenig,
amended version of original ado file ols_spatial_hac by Solomon Hsiang.
- Spatial regressions are run with the xsmle command (by Federico Belotti, Gordon Hughes, Andrea Piano Mortari), September 2016 version - included in "dofiles/ado" file folder.
*/


* load data *
use "${data}/gridded.dta", clear
keep if continent == "Africa"

/* Cleaning */
bysort gid year : drop if _N > 1

/* Rename */
rename pop_gpw		pop
rename sdg_degr 	land_degradation
rename carb_degr 	land_degradation2
rename luc_degr 	land_degradation3
rename prod_degr	land_degradation4

/* Scaling */
foreach var of varlist land_degradation land_degradation2 land_degradation3 land_degradation4 {
replace `var' = round(`var', 0.00001)
}

foreach var of varlist droughtyr* {
	replace `var' = 0 if `var' == .
}

cap drop t
gen t = land_degradation if year == 2000
bys gid : egen land_degradation2000 = max(t)
cap drop t

* drought variables *
rename (droughtyr_speibase droughtyr_spi droughtcrop_speibase droughtcrop_spi drought) ///
		(DSPEIP DSPIP DCSPEIP DCSPIP DSPE)

foreach var of varlist DSPEIP DSPIP DCSPEIP DCSPIP DSPE {
	bys gid : egen `var'_9500 = sum(`var') if year > 1998 & year < 2001
	bys gid : egen `var'_0005 = sum(`var') if year > 2003 & year < 2006
	bys gid : egen `var'_0510 = sum(`var') if year > 2008 & year < 2011
	bys gid : egen `var'_1015 = sum(`var') if year > 2013 & year < 2016
}

foreach var of varlist *_9500 *_0005 *_0510 *_1015 {
	cap drop max
	bys gid : egen max = max(`var')
	replace `var' = max
}

foreach var of varlist DSPEIP DSPIP DCSPEIP DCSPIP DSPE {
	gen i_`var' = `var'_9500 if year == 2000
	replace i_`var' = `var'_0005 if year == 2005
	replace i_`var' = `var'_0510 if year == 2010
	replace i_`var' = `var'_1015 if year == 2015
}

drop *_9500 *_0005 *_0510 *_1015



sort gid year
save "$output/tmp", replace


/* Calcul variation pop*/
use "$output/tmp", clear

foreach var of varlist petroleum_s diamsec_s diamprim_s goldplacer_s goldvein_s gem_s {
replace `var' = 0 if `var' == .
}

replace ttime_mean = ln(ttime_mean)

*gen Ddpop = (dpop>0)
*replace Ddpop = . if dpop == .

*gen ldpop = ln(dpop)
gen lpop 			= ln(1+pop)
gen lpop_world_pop 	= ln(1+pop_world_pop)

rename ycoord latitude
rename xcoord longitude

/* selection */
tsset gid year

/* Level FE */
egen id_iso = group(iso3)
egen id_iso_year = group(id_iso year)
egen id_gid	= group(gid)

/* Re-scale*/
*replace dpop	= dpop / 1000

sort gid year
save "$output/tmp", replace
keep if year == 2000 | year == 2005 | year == 2010 | year == 2015
cap log close
qui : tab id_iso, g(Iiso_)
qui : tab year, g(Iyear_)

cap log close


***************************************************************
*************************** TABLES ***************************
***************************************************************
log using "$logs/results_drought.log", replace

/* Baseline estimates */
* i_DSPEIP i_DSPIP i_DCSPEIP i_DCSPIP i_DSPE
* correlation

* table 1
eststo m1: reg lpop i_DSPEIP , cluster(id_iso)
eststo m2: reghdfe lpop i_DSPEIP , absorb(id_gid) cluster(id_iso)
eststo m3: reghdfe lpop i_DSPEIP , absorb(id_gid year) cluster(id_iso)
eststo m4: reg2hdfespatial  lpop i_DSPEIP   , timevar(year) panelvar(id_gid) lat(latitude) lon(longitude) distcutoff(500) lagcutoff(100000) 
eststo m5: reg2hdfespatial  lpop land_degradation2   , timevar(year) panelvar(id_gid) lat(latitude) lon(longitude) distcutoff(500) lagcutoff(100000)  
 
set linesize 250

esttab m1 m2 m3 m4 m5, title(Baseline Estimation Results) ///
		nonumbers mtitles("Drought" "Drought" "Drought" "Drought" "Soil Degradation") ///
		b(%5.3f) se(%5.3f) r2  starlevels({$^*$} 0.1 {$^**$} 0.05 {$^***$} 0.01) se  label noconst booktabs

eststo clear

/* Results by intensity: visual representation */
* table 2
gen Iq_drought_1 = (i_DSPEIP< 0.1)
gen Iq_drought_2 = (i_DSPEIP> 0.1 & i_DSPEIP< 0.2)
gen Iq_drought_3 = (i_DSPEIP> 0.19 & i_DSPEIP< 0.5)
gen Iq_drought_4 = (i_DSPEIP> 0.49 & i_DSPEIP != .)

reg2hdfespatial  lpop Iq_drought_2 - Iq_drought_4 , timevar(year) panelvar(id_gid) lat(latitude) lon(longitude) distcutoff(500) lagcutoff(100000) 

foreach num of numlist 1/4 {
label var Iq_drought_`num' "Group `num'"
}
coefplot, drop(_cons) xline(0, lstyle(grey) lpattern(dash) lcolor(blue))  scheme(s1mono) title("") note("")
graph export "${output}/drought_intensity1.pdf", as(pdf) replace 

/* gdp */
* table 3
cap drop init
xtile init = log(gcp_ppp), nq(4)
tab init, g(gdp_)
foreach var of varlist gdp_* {
	gen `var'_d = i_DSPEIP*`var'
}
reg2hdfespatial  lpop gdp_*_d   , timevar(year) panelvar(id_gid) lat(latitude) lon(longitude) distcutoff(500) lagcutoff(100000) 
foreach num of numlist 1/4 {
label var gdp_`num'_d "Cell Income Quartile: `num'"
}
coefplot, drop(_cons) xline(0, lstyle(grey) lpattern(dash) lcolor(blue))  scheme(s1mono) title("") note("")
graph export "${output}/drought_gdp1.pdf", as(pdf) replace 


/* Results natural resources */
* table 4
replace mrds = max(mrds, 0)>0
gen resources = petroleum_s + diamsec_s + diamprim_s + goldplacer_s + goldvein_s + gem_s
replace resources = resources > 0 & !missing(resources)
cap drop a
gen a = DSPEIP*resources
eststo m0: reg2hdfespatial  lpop i_DSPEIP a, timevar(year) panelvar(id_gid) lat(latitude) lon(longitude) distcutoff(500) lagcutoff(100000) 
cap drop b
gen b = DSPEIP*mrds
eststo m1: reg2hdfespatial  lpop i_DSPEIP b, timevar(year) panelvar(id_gid) lat(latitude) lon(longitude) distcutoff(500) lagcutoff(100000) 
esttab m0 m1, title(Resource Availability) ///
		nonumbers mtitles("Mineral Resources" "MRDS Extraction Location") ///
		b(%5.3f) se(%5.3f) r2  starlevels({$^*$} 0.1 {$^**$} 0.05 {$^***$} 0.01) se  label noconst booktabs
drop a b

eststo clear


* travel time *
* table 5
cap drop a 
cap drop b
gen a = DSPEIP*ttime_mean
gen b = DSPEIP*urban_gc

eststo m0: reg2hdfespatial  lpop i_DSPEIP a, timevar(year) panelvar(id_gid) lat(latitude) lon(longitude) distcutoff(500) lagcutoff(100000) 
eststo m1: reg2hdfespatial  lpop i_DSPEIP b, timevar(year) panelvar(id_gid) lat(latitude) lon(longitude) distcutoff(500) lagcutoff(100000) 
esttab m0 m1, title(Urbanization) ///
		nonumbers mtitles("Closeness to Capital" "Urbanization") ///
		b(%5.3f) se(%5.3f) r2  starlevels({$^*$} 0.1 {$^**$} 0.05 {$^***$} 0.01) se  label noconst booktabs
eststo clear


cap log close
